NOTES 1. Need to revise the flora heatmap to actually work 2. Then knit so a reviewer can look at it

This is a template to make your visual inspection script. It does not have to be an R Markdown but I like then especially the knit function so I can send the plots to coauthors. Make sure all of the columns get plotted even ones you think might not be that relevant.

R Markdown Guide

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document.

10JAN24 Notes for next year:

  1. We want to back-apply the L1 generation script to raw historical data also
  2. To do this, you will need to go back to previous versions of the script found in Scripts -> FluoroProbe_aggregation.R
  3. Update the maintenance log to include any date-specific QAQC that occurred in R for 2014-2022 that is not already included in maintenance log, based on historic versions of the FluoroProbe_aggregation.R file over time
  4. Add the qaqc function to the visual inspection script so that all the raw data from all years are read in, combined, and put through the qaqc function
current_df <- read_csv("./FluoroProbe_2014_2023.csv")
## Rows: 110289 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (1): Reservoir
## dbl  (31): Site, CastID, Depth_m, GreenAlgae_ugL, Bluegreens_ugL, BrownAlgae...
## dttm  (1): DateTime
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

This section checks to make sure each observation has a data flag. It also checks to make sure the frequency of flags match what we expect to see.

#make sure no NAS in the Flag columns
Flags=current_df%>%
  select(DateTime, starts_with("Flag"))

RowsNA=Flags[!complete.cases(Flags), ] # Keep only the complete rows

#check the flag column
Flags=current_df%>%
  select(starts_with("Flag"))

# Make a table with the number of times a flag was used
for(f in 1:(ncol(Flags))){
  #print(colnames(Flags[f]))
  print(table(Flags[,f], useNA = "always"))
}
## Flag_GreenAlgae_ugL
##     0     1     3  <NA> 
## 72935 18706 18648     0 
## Flag_Bluegreens_ugL
##     0     1     3  <NA> 
## 72935 18706 18648     0 
## Flag_BrownAlgae_ugL
##     0     1     3  <NA> 
## 72935 18706 18648     0 
## Flag_MixedAlgae_ugL
##     0     1     3  <NA> 
## 72935 18706 18648     0 
## Flag_YellowSubstances_ugL
##     0     1     3  <NA> 
## 72935 18706 18648     0 
## Flag_TotalConc_ugL
##     0     1     3  <NA> 
## 72935 18706 18648     0 
## Flag_Temp_C
##     0     2  <NA> 
## 67309 42980     0 
## Flag_Transmission_perc
##     0     1     2     3  <NA> 
## 69085 18706  3877 18621     0 
## Flag_RFU_525nm
##     0     3  <NA> 
## 80974 29315     0 
## Flag_RFU_570nm
##     0     3  <NA> 
## 80974 29315     0 
## Flag_RFU_610nm
##     0     3  <NA> 
## 80974 29315     0 
## Flag_RFU_370nm
##     0     1     3  <NA> 
## 72935 18706 18648     0 
## Flag_RFU_590nm
##     0     3  <NA> 
## 80974 29315     0 
## Flag_RFU_470nm
##     0     1     3  <NA> 
## 72935 18706 18648     0
flora_heatmap <- function(fp_data, reservoir, year, site, z){
  
  #subset to relevant data
  fp <- fp_data %>%
    filter(Reservoir == reservoir & year(DateTime) == year & Site == site) %>%
    select(CastID, DateTime, Depth_m, {{z}}) 
  
  #slice by depth for each reservoir
  if (reservoir == "FCR"){
  
  depths = seq(0.1, 9.3, by = 0.3)
  df.final<-data.frame()
  
  for (i in 1:length(depths)){
    
fp_layer <- fp %>% 
  group_by(CastID) %>% 
  slice(which.min(abs(as.numeric(Depth_m) - depths[i])))

# Bind each of the data layers together.
df.final = bind_rows(df.final, fp_layer)

}


} else if (reservoir == "BVR"){
  
  depths = seq(0.1, 10, by = 0.3)
  df.final<-data.frame()
  
  for (i in 1:length(depths)){
    
    fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
    
    # Bind each of the data layers together.
    df.final = bind_rows(df.final, fp_layer)
    
  }
  
} else if(reservoir == "CCR"){
  
  depths = seq(0.1, 20, by = 0.3)
  df.final<-data.frame()
  
  for (i in 1:length(depths)){
    
    fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
    
    # Bind each of the data layers together.
    df.final = bind_rows(df.final, fp_layer)
    
  }
  } else if(reservoir == "GWR"){
  
  depths = seq(0.1, 12, by = 0.3)
  df.final<-data.frame()
  
  for (i in 1:length(depths)){
    
    fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
    
    # Bind each of the data layers together.
    df.final = bind_rows(df.final, fp_layer)
    
  }
  } else if(reservoir == "SHR"){
  
  depths = seq(0.1, 30, by = 0.3)
  df.final<-data.frame()
  
  for (i in 1:length(depths)){
    
    fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
    
    # Bind each of the data layers together.
    df.final = bind_rows(df.final, fp_layer)
    
  } 
  
  }
  
  #wrangle final dataframe for plotting
  # Re-arrange the data frame by date
  fp_new <- arrange(df.final, DateTime)

  # Round each extracted depth to the nearest 10th. 
  fp_new$Depth_m <- round(as.numeric(fp_new$Depth_m), digits = 0.5)
  
  # Convert to DOY
  fp_new$DOY <- yday(fp_new$DateTime)
  
  fig_title <- paste(reservoir, year, "Site", site, z, sep = " ")
  
  interp <- interp(x=fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]),
                      xo = seq(min(fp_new$DOY), max(fp_new$DOY), by = .1), 
                      yo = seq(min(fp_new$Depth_m), max(fp_new$Depth_m), by = 0.01),
                      extrap = T, linear = T, duplicate = "strip")
interp <- interp2xyz(interp, data.frame=T)
  
  p1 <- ggplot(interp, aes(x=x, y=y))+
  geom_raster(aes(fill=z))+
  scale_y_reverse(expand = c(0,0))+
  scale_x_continuous(expand = c(0, 0)) +
  scale_fill_gradientn(colours = blue2green2red(60), na.value="gray")+
  labs(x = "Day of year", y = "Depth (m)", title = fig_title,fill=expression(paste(mu,g/L)))+
  theme_bw()

print(p1)

}
flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2023, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "BVR", year = 2023, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!

## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "CCR", year = 2023, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!

## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "BVR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!

## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "CCR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!

## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "SHR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!

## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "GWR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!

## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

Check to make sure that what is in the maintenance log was actually removed

Look at the last rows of the maintenance log

We want to make sure that our maintenance log actually worked and took out the values or changes those it was supposed to

## Rows: 17 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Reservoir, DataStream, TIMESTAMP_start, TIMESTAMP_end, start_parame...
## dbl (2): Site, flag
## lgl (2): Depth, update_value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 × 11
##   Reservoir  Site Depth DataStream TIMESTAMP_start TIMESTAMP_end start_parameter
##   <chr>     <dbl> <lgl> <chr>      <chr>           <chr>         <chr>          
## 1 BVR          50 NA    Flora      2019-09-11 00:… 2019-09-11 2… Temp_C         
## 2 FCR          50 NA    Flora      2019-09-02 00:… 2019-09-02 2… Temp_C         
## 3 FCR          50 NA    Flora      2019-08-28 00:… 2019-12-31 2… Transmission_p…
## 4 BVR          50 NA    Flora      2019-08-28 00:… 2019-12-31 2… Transmission_p…
## 5 CCR          50 NA    Flora      2019-08-28 00:… 2019-12-31 2… Transmission_p…
## 6 FCR          50 NA    Flora      2020-08-24 00:… 2020-08-24 2… Temp_C         
## # ℹ 4 more variables: end_parameter <chr>, flag <dbl>, update_value <lgl>,
## #   notes <chr>
Reservoir Site Depth DataStream TIMESTAMP_start TIMESTAMP_end start_parameter end_parameter flag update_value notes
BVR 50 NA Flora 2019-09-11 00:00:00 EDT 2019-09-11 23:59:59 EDT Temp_C Temp_C 2 NA bad temperature data
FCR 50 NA Flora 2019-09-02 00:00:00 EDT 2019-09-02 23:59:59 EDT Temp_C Temp_C 2 NA bad temperature data
FCR 50 NA Flora 2019-08-28 00:00:00 EDT 2019-12-31 23:59:59 EDT Transmission_perc Transmission_perc 2 NA bad transmission data (all 100%)
BVR 50 NA Flora 2019-08-28 00:00:00 EDT 2019-12-31 23:59:59 EDT Transmission_perc Transmission_perc 2 NA bad transmission data (all 100%)
CCR 50 NA Flora 2019-08-28 00:00:00 EDT 2019-12-31 23:59:59 EDT Transmission_perc Transmission_perc 2 NA bad transmission data (all 100%)
FCR 50 NA Flora 2020-08-24 00:00:00 EDT 2020-08-24 23:59:59 EDT Temp_C Temp_C 2 NA bad temperature data

Check the that the columns have flags

Look at the first few rows of the data frame and check that the observations after the TIMESTAMP_start are flagged

Look at the first 5 rows for that time

## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(colname_start)
## 
##   # Now:
##   data %>% select(all_of(colname_start))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(colname_end)
## 
##   # Now:
##   data %>% select(all_of(colname_end))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Reservoir DateTime Temp_C Flag_Temp_C
FCR 2020-08-24 15:53:03 NA 2
FCR 2020-08-24 15:53:05 NA 2
FCR 2020-08-24 15:53:07 NA 2
FCR 2020-08-24 15:53:10 NA 2
FCR 2020-08-24 15:53:12 NA 2
FCR 2020-08-24 15:53:15 NA 2

Look at the last 6 rows for the maintenance time

Make sure the observations are flagged

Reservoir DateTime Temp_C Flag_Temp_C
FCR 2020-08-24 15:55:06 NA 2
FCR 2020-08-24 15:55:09 NA 2
FCR 2020-08-24 15:55:11 NA 2
FCR 2020-08-24 15:55:14 NA 2
FCR 2020-08-24 15:55:16 NA 2
FCR 2020-08-24 15:55:18 NA 2

Make site description file

 # These lines of code make the csv of the site descriptions with lat and long
 # MEL You don't need to run this if you already have the file I believe?

  # # Use Gsheet because you don't need to authenticate it. 
  # sites <- gsheet::gsheet2tbl("https://docs.google.com/spreadsheets/d/1TlQRdjmi_lzwFfQ6Ovv1CAozmCEkHumDmbg_L4A2e-8/edit#gid=1244423834")
  # #data<- read_csv("YOUR DATA.csv")# Use this if you read in a csv
  # data <- current_df #This is the line you need to modify!
  # trim_sites = function(data,sites){
  #   data_res_site=data%>% #Create a Reservoir/Site combo column
  #     mutate(res_site = trimws(paste0(Reservoir,Site)))
  #   sites_merged = sites%>% #Filter to Sites that are in the dataframe
  #     mutate(res_site = trimws(paste0(Reservoir,Site)))%>%
  #     filter(res_site%in%data_res_site$res_site)%>%
  #     select(-res_site)
  # }
  # sites_trimmed = trim_sites(data,sites) 
  # write.csv(sites_trimmed,"site_descriptions.csv", row.names=F)# Write to file